Required packages are: numpy, scipy, matplotlib, seaborn, scikit-learn, pywt, pandas, statmodels.
Up to section $\quad$ Return to contents
import sys
sys.path.append('../src/')
import os
import copy
import re
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None) # to show non-truncated version of table
pd.set_option('display.max_colwidth', -1)
import pywt
from ml_classes import dwtLDA
from ml_classes import dwtLR
from ml_classes import dwtAC
from ml_classes import dwtSC
from nonlinearity import sign_thresh, hos_thresh
from alg_stats import get_gscv_stats
from alg_stats import GridSearchCV_unsupervised
import scipy.optimize
from files_io import ftirLoad
from sklearn.metrics import accuracy_score
from sklearn.metrics import fowlkes_mallows_score
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import adjusted_mutual_info_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from statsmodels.graphics.mosaicplot import mosaic
import matplotlib as mpl
mpl.use("pgf")
mpl.rcParams.update({
"text.usetex": True,
"pgf.texsystem": "xelatex",
"pgf.preamble": [
r'\usepackage{soul}',
]
})
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import matplotlib.cm as cm
%matplotlib inline
#%matplotlib notebook
Up to section $\quad$ Return to contents
# RepeatedKFoldCV configuration
n_splits = 4
n_repeats = 25
# number of parallel jobs for GridSearchCV
nJobsGSCV = 2
# limiter for FTIR spectrum (cm^{-1})
bandLim = 2000.
# random states for CV
random_state_cv_long = 235
random_state_cv_short = 578
# config for train-test plot
# Legend
colours_trte = cm.rainbow(np.linspace(0, 1, 8)) # abs/mean/std => 2^3
markers_trte = {
'original': 'o',
'd1': 's',
'd2': '^'
}
legend_elements_trte = [
Patch(
facecolor=colours_trte[7],
label=r'$\overline{\mathrm{abs}}$/$\overline{\mathrm{mean}}$/$\overline{\mathrm{std}}$'
),
Patch(
facecolor=colours_trte[6],
label=r'$\overline{\mathrm{abs}}$/$\overline{\mathrm{mean}}$/std'
),
Patch(
facecolor=colours_trte[5],
label=r'$\overline{\mathrm{abs}}$/mean/$\overline{\mathrm{std}}$'
),
Patch(
facecolor=colours_trte[4],
label=r'$\overline{\mathrm{abs}}$/mean/std'
),
Patch(
facecolor=colours_trte[3],
label=r'abs/$\overline{\mathrm{mean}}$/$\overline{\mathrm{std}}$'
),
Patch(
facecolor=colours_trte[2],
label=r'abs/$\overline{\mathrm{mean}}$/std'
),
Patch(
facecolor=colours_trte[1],
label=r'abs/mean/$\overline{\mathrm{std}}$'
),
Patch(
facecolor=colours_trte[0],
label=r'abs/mean/std'
),
Line2D(
[0], [0], marker=markers_trte['original'], color='w', label=r'$f$',
markerfacecolor='black', markersize=10
),
Line2D(
[0], [0], marker=markers_trte['d1'], color='w', label=r'$f^{\prime}$',
markerfacecolor='black', markersize=10
),
Line2D(
[0], [0], marker=markers_trte['d2'], color='w', label=r'$f^{\prime\prime}$',
markerfacecolor='black', markersize=10
),
]
barplot_palette = [
'#CC3333', '#FF8C69', '#F2BC14', '#FFE8A0', '#046969', '#528B8B'
]
Up to section $\quad$ Return to contents
def stringSplitByNumbers(x):
'''
from comment here
http://code.activestate.com/recipes/135435-sort-a-string-using-numeric-order/
'''
r = re.compile('(\d+)')
l = r.split(x)
return [int(y) if y.isdigit() else y for y in l]
def df2vec3(df, which='mean'):
keys = df.columns.values
param = df['params'].map(eval)
keys = filter(lambda x: x.startswith('split'), keys)
test_score_keys = filter(lambda x: x.endswith('test_score'), keys)
test_score_keys = filter(lambda x: not (('mean' in x) or ('std' in x)), test_score_keys)
test_score = df[test_score_keys].values #[df[key].values for key in test_score_keys]
#test_score = np.vstack(test_score)
#print test_score_keys
train_score_keys = filter(lambda x: x.endswith('train_score'), keys)
train_score_keys = filter(lambda x: not (('mean' in x) or ('std' in x)), train_score_keys)
train_score = df[train_score_keys].values #[df[key].values for key in train_score_keys]
#train_score = np.vstack(train_score)
#print train_score_keys
if which == 'mean':
rv_train = np.mean(train_score, axis=1)
rv_test = np.mean(test_score, axis=1)
elif which == 'median':
rv_train = np.median(train_score, axis=1)
rv_test = np.median(test_score, axis=1)
else:
raise ValueError
return rv_train, rv_test, param
def df2vec3_fromDirectory(dirname, filename_base, which='mean'):
filenames = os.listdir(dirname)
filenames = filter(lambda x: x.startswith(filename_base), filenames)
filenames = sorted(filenames, key=stringSplitByNumbers)
df_composed = []
for i_fnm in xrange(len(filenames)):
filename = filenames[i_fnm]
df = pd.read_csv(dirname+filename)
keys = df.columns.values
#param = df['params']
df.set_index('params', drop=True, append=False, inplace=True, verify_integrity=False)
keys = filter(lambda x: (x.startswith('split')) or (x.endswith('train_score')), keys)
df_composed.append( df[keys] )
df_composed = pd.concat(df_composed, axis=1)
df_composed['params'] = df.index
return df2vec3(df_composed, which)
def get_indices_lda_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=None, wavelet_name=None, mode=None):
indices = map(
lambda x: (
(x['th'] == th) and
(x['with_abs'] == with_abs) and
(x['with_mean'] == with_mean) and
(x['with_std'] == with_std)
),
gparams
)
if wavelet_family is not None:
indices2 = map(lambda x: x['wavelet_name'].startswith(wavelet_family), gparams)
indices = [indices[i] and indices2[i] for i in xrange(len(gparams))]
if wavelet_name is not None:
indices2 = map(lambda x: x['wavelet_name'] == wavelet_name, gparams)
indices = [indices[i] and indices2[i] for i in xrange(len(gparams))]
if mode is not None:
indices2 = map(lambda x: x['mode'] == mode, gparams)
indices = [indices[i] and indices2[i] for i in xrange(len(gparams))]
return indices
def get_indices_lr_dwt(
gparams, th, with_abs, with_mean, with_std,
penalty=None, wavelet_family=None, wavelet_name=None, mode=None
):
indices = map(
lambda x: (
(x['th'] == th) and
(x['with_abs'] == with_abs) and
(x['with_mean'] == with_mean) and
(x['with_std'] == with_std)
),
gparams
)
if penalty is not None:
indices2 = map(lambda x: x['penalty'] == penalty, gparams)
indices = [indices[i] and indices2[i] for i in xrange(len(gparams))]
if wavelet_family is not None:
indices2 = map(lambda x: x['wavelet_name'].startswith(wavelet_family), gparams)
indices = [indices[i] and indices2[i] for i in xrange(len(gparams))]
if wavelet_name is not None:
indices2 = map(lambda x: x['wavelet_name'] == wavelet_name, gparams)
indices = [indices[i] and indices2[i] for i in xrange(len(gparams))]
if mode is not None:
indices2 = map(lambda x: x['mode'] == mode, gparams)
indices = [indices[i] and indices2[i] for i in xrange(len(gparams))]
return indices
def npz2vec3_fromDirectory(dirname, filename_base, ofs, sfs, which='mean'):
filenames = os.listdir(dirname)
filenames = filter(lambda x: x.startswith(filename_base+ofs+'_'+sfs), filenames)
filenames = sorted(filenames, key=stringSplitByNumbers)
df_composed = []
axesList = None
for i_fnm in xrange(len(filenames)):
filename = filenames[i_fnm]
df = np.load(dirname+filename)
df_composed.append(df['res_dwt_'+ofs])
if axesList is None:
axesList = df['axes_dwt_'+ofs].tolist()
else:
assert axesList == df['axes_dwt_'+ofs].tolist()
df_composed = np.concatenate(df_composed, axis=-3)
if which == 'mean':
df_composed = np.mean(df_composed, axis=-2) # eval
df_composed = np.mean(df_composed, axis=-2) # cv
elif which == 'median':
df_composed = np.median(df_composed, axis=-2) # eval
df_composed = np.median(df_composed, axis=-2) # cv
else:
raise NotImplementedError
axesList = axesList[:-3]+axesList[-1:]
return df_composed, axesList
def mosaicKeyMapper(x):
if x in ('hard', 'soft'):
return x
return eval(x)
def custom_mosaic_dwt(mosaic_data, mosaic_data_3var_dict, cmapInstance):
properties = lambda key: {'color': cmapInstance(mosaic_data[tuple(map(mosaicKeyMapper, key))])}
fig, rect = mosaic(
mosaic_data, gap=0.05, labelizer=lambda x: '',
properties=properties
)
fig.set_size_inches(4, 3)
fig.axes[0].set_xlabel('Standard scaling axis')
fig.axes[2].set_xlabel('Subtract')
fig.axes[1].set_ylabel('Threshold')
fig.axes[1].yaxis.set_label_coords(1.05, 0.5)
yticks = fig.axes[0].get_yticklabels()
yticks = map(lambda x: mosaic_data_3var_dict[x.get_text()], yticks)
fig.axes[0].set_yticklabels(yticks)
return fig, rect
Up to section $\quad$ Return to contents
data_dirname = '../data/'
## data with smaller resolution
fnms1 = [
'08-07-2017-baselinecorrected.csv',
'22-09-2017-baselinecorrected.csv'
]
## data with higher resolution
fnms2 = [
'16-04-2018-baselinecorrected.csv',
'18-04-2018-baselinecorrected.csv'
]
X1, labels1, freq = ftirLoad(fnms1, dirname=data_dirname)
X2, labels2, freq2 = ftirLoad(fnms2, dirname=data_dirname)
## v.2018 sampled more precisely; discretization points are exactly the same
X = np.hstack([X1[:-1, :], X2[::2, :]])
labels = labels1.tolist() + labels2.tolist()
labels = np.array(labels, dtype='i')
ind = np.where(freq <= bandLim)[0]
X, freq = X[ind], freq[ind]
Nsamples = X.shape[1]
# plain data for DWT
plainT = 0.01*X
# first order derivative data for DWT
dT = (plainT[2:] - plainT[:-2]) / (2.*np.reshape(freq[2:] - freq[:-2], [-1, 1]))
dFreq = freq[1:-1].copy()
# second order derivative data for DWT
d2T = (plainT[2:] - 2*plainT[1:-1] + plainT[:-2]) / np.reshape(freq[2:] - freq[:-2], [-1, 1])**2.
d2Freq = freq[1:-1].copy()
# generate CV indices
separator_long = RepeatedStratifiedKFold(
n_splits=n_splits, n_repeats=n_repeats, random_state=random_state_cv_long
)
separator_short = RepeatedStratifiedKFold(
n_splits=n_splits, n_repeats=1, random_state=random_state_cv_short
)
dummyX = np.empty(Nsamples)
train_indices_long, test_indices_long = [], []
for train_index, test_index in separator_long.split(dummyX, labels):
train_indices_long.append(train_index)
test_indices_long.append(test_index)
train_indices_short, test_indices_short = [], []
for train_index, test_index in separator_short.split(dummyX, labels):
train_indices_short.append(train_index)
test_indices_short.append(test_index)
custom_cv_short = zip(train_indices_short[:n_splits], test_indices_short[:n_splits])
2.1 Linear discriminant analysis
Up to section $\quad$ Return to contents
Up to subsection $\quad$ Up to section $\quad$ Return to contents
filename_base = 'gscv_dwt_lda_'
results_dirname = '../results/dwt/lda/'
results_dirname_sign = '../results/dwt/lda_sign/'
if not os.path.isdir(results_dirname):
os.makedirs(results_dirname)
if not os.path.isdir(results_dirname_sign):
os.makedirs(results_dirname_sign)
dirnames = [results_dirname, results_dirname_sign]
nonlins = [hos_thresh, sign_thresh]
nonlin_names = ['hard/soft threshold', 'hard/soft threshold + signum']
param_grid = {}
param_grid['with_mean'] = [True, False]
param_grid['with_std'] = [True, False]
param_grid['with_abs'] = [True, False]
param_grid['th'] = ['soft', 'hard']
param_grid['tau'] = np.power(10., np.linspace(-5, 1, 10))
param_grid['wavelet_name'] = [
# biorthogonal (15)
'bior1.1', 'bior1.3', 'bior1.5', 'bior2.2', 'bior2.4', 'bior2.6', 'bior2.8',
'bior3.1', 'bior3.3', 'bior3.5', 'bior3.7', 'bior3.9', 'bior4.4', 'bior5.5',
'bior6.8',
# coiflets (17)
'coif1', 'coif2', 'coif3', 'coif4', 'coif5', 'coif6', 'coif7',
'coif8', 'coif9', 'coif10', 'coif11', 'coif12', 'coif13', 'coif14', 'coif15',
'coif16', 'coif17',
# daubechies (20)
'db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8',
'db9', 'db10', 'db11', 'db12', 'db13', 'db14', 'db15', 'db16', 'db17',
'db18', 'db19', 'db20',
# reversed biorthogonal (15)
'rbio1.1', 'rbio1.3', 'rbio1.5',
'rbio2.2', 'rbio2.4', 'rbio2.6', 'rbio2.8', 'rbio3.1', 'rbio3.3', 'rbio3.5',
'rbio3.7', 'rbio3.9', 'rbio4.4', 'rbio5.5', 'rbio6.8',
# symlets (19)
'sym2', 'sym3',
'sym4', 'sym5', 'sym6', 'sym7', 'sym8', 'sym9', 'sym10', 'sym11', 'sym12',
'sym13', 'sym14', 'sym15', 'sym16', 'sym17', 'sym18', 'sym19', 'sym20'
]
# param_grid['mode'] = ['constant']
param_grid['mode'] = copy.deepcopy(pywt.Modes.modes)
Nmodes = len(param_grid['mode'])
Ntau = len(param_grid['tau'])
Nwavs = len(param_grid['wavelet_name'])
waveletFamilyNames = [
'bior', 'coif', 'db', 'rbio', 'sym'
]
parBool = [
len(param_grid['th']),
len(param_grid['with_abs']),
len(param_grid['with_mean']),
len(param_grid['with_std'])
]
loopParamNames = [
'th',
'with_abs',
'with_mean',
'with_std'
]
ntab = np.prod(parBool)
Up to subsection $\quad$ Up to section $\quad$ Return to contents
# supress nasty sklearn warnings
def warn(*args, **kwargs):
pass
import warnings
warnings.warn = warn
# thanks to https://stackoverflow.com/questions/32612180/eliminating-warnings-from-scikit-learn
for i_dnm in xrange(len(dirnames)):
nonlin = nonlins[i_dnm]
current_dirname = dirnames[i_dnm]
print '============= %s ===========' % (nonlin_names[i_dnm])
for k in xrange(len(custom_cv_short)):
gscv_dwt_lda_plain = GridSearchCV(
estimator=dwtLDA(nonlin=nonlin), param_grid=param_grid, cv=custom_cv_short[k:k+1],
n_jobs=nJobsGSCV, verbose=True,
return_train_score=True, scoring='accuracy'
)
gscv_dwt_lda_d1 = GridSearchCV(
estimator=dwtLDA(nonlin=nonlin), param_grid=param_grid, cv=custom_cv_short[k:k+1],
n_jobs=nJobsGSCV, verbose=True,
return_train_score=True, scoring='accuracy'
)
gscv_dwt_lda_d2 = GridSearchCV(
estimator=dwtLDA(nonlin=nonlin), param_grid=param_grid, cv=custom_cv_short[k:k+1],
n_jobs=nJobsGSCV, verbose=True,
return_train_score=True, scoring='accuracy'
)
gscv_dwt_lda_plain.fit(plainT.T, labels);
# pandas 0.19 tries to make sorting of dict.keys()
df_dwt_lda_plain = pd.DataFrame(gscv_dwt_lda_plain.cv_results_)
df_dwt_lda_plain.to_csv(current_dirname+filename_base+'plain_%d.csv' % (k), index=False)
print 'Results are ready for plain data'
gscv_dwt_lda_d1.fit(dT.T, labels);
# pandas 0.19 tries to make sorting of dict.keys()
df_dwt_lda_d1 = pd.DataFrame(gscv_dwt_lda_d1.cv_results_)
df_dwt_lda_d1.to_csv(current_dirname+filename_base+'d1_%d.csv' % (k), index=False)
print 'Results are ready for 1st derivative data'
gscv_dwt_lda_d2.fit(d2T.T, labels);
# pandas 0.19 tries to make sorting of dict.keys()
df_dwt_lda_d2 = pd.DataFrame(gscv_dwt_lda_d2.cv_results_)
df_dwt_lda_d2.to_csv(current_dirname+filename_base+'d2_%d.csv' % (k), index=False)
print 'Results are ready for 2nd derivative data'
Up to subsection $\quad$ Up to section $\quad$ Return to contents
for i_dnm in xrange(len(dirnames)):
nonlin = nonlins[i_dnm]
current_dirname = dirnames[i_dnm]
print '============= %s ===========' % (nonlin_names[i_dnm])
for k in xrange(ntab):
index = np.unravel_index(k, parBool, order='C')
current_param_values = [param_grid[loopParamNames[i]][index[i]] for i in xrange(len(loopParamNames))]
th, with_abs, with_mean, with_std = current_param_values
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'plain', which='mean'
)
indices = get_indices_lda_dwt(gparams, th, with_abs, with_mean, with_std) #####################
train_dwt_lda_plain, test_dwt_lda_plain = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'d1', which='mean'
)
indices = get_indices_lda_dwt(gparams, th, with_abs, with_mean, with_std) ############
train_dwt_lda_d1, test_dwt_lda_d1 = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'d2', which='mean'
)
indices = get_indices_lda_dwt(gparams, th, with_abs, with_mean, with_std) ###################
train_dwt_lda_d2, test_dwt_lda_d2 = train_score[indices], test_score[indices]
train_dwt_lda_plain = np.reshape(train_dwt_lda_plain, [Nmodes, Ntau, Nwavs]) # C order
test_dwt_lda_plain = np.reshape(test_dwt_lda_plain, [Nmodes, Ntau, Nwavs]) # C order
train_dwt_lda_d1 = np.reshape(train_dwt_lda_d1, [Nmodes, Ntau, Nwavs]) # C order
test_dwt_lda_d1 = np.reshape(test_dwt_lda_d1, [Nmodes, Ntau, Nwavs]) # C order
train_dwt_lda_d2 = np.reshape(train_dwt_lda_d2, [Nmodes, Ntau, Nwavs]) # C order
test_dwt_lda_d2 = np.reshape(test_dwt_lda_d2, [Nmodes, Ntau, Nwavs]) # C order
ind1 = np.where(test_dwt_lda_plain == test_dwt_lda_plain.max())
ind1 = tuple(map(lambda x: x[0], ind1))
ind2 = np.where(test_dwt_lda_d1 == test_dwt_lda_d1.max())
ind2 = tuple(map(lambda x: x[0], ind2))
ind3 = np.where(test_dwt_lda_d2 == test_dwt_lda_d2.max())
ind3 = tuple(map(lambda x: x[0], ind3))
df_out = {}
df_out['train'] = {
'original (%s [%s], tau=%.2e)' % (
param_grid['wavelet_name'][ind1[2]],
param_grid['mode'][ind1[0]],
param_grid['tau'][ind1[1]]
): train_dwt_lda_plain[ind1],
'1st deriv. (%s [%s], tau=%.2e)' % (
param_grid['wavelet_name'][ind2[2]],
param_grid['mode'][ind2[0]],
param_grid['tau'][ind2[1]]
): train_dwt_lda_d1[ind2],
'2nd deriv. (%s [%s], tau=%.2e)' % (
param_grid['wavelet_name'][ind3[2]],
param_grid['mode'][ind3[0]],
param_grid['tau'][ind3[1]]
): train_dwt_lda_d2[ind3],
}
df_out['test'] = {
'original (%s [%s], tau=%.2e)' % (
param_grid['wavelet_name'][ind1[2]],
param_grid['mode'][ind1[0]],
param_grid['tau'][ind1[1]]
): test_dwt_lda_plain[ind1],
'1st deriv. (%s [%s], tau=%.2e)' % (
param_grid['wavelet_name'][ind2[2]],
param_grid['mode'][ind2[0]],
param_grid['tau'][ind2[1]]
): test_dwt_lda_d1[ind2],
'2nd deriv. (%s [%s], tau=%.2e)' % (
param_grid['wavelet_name'][ind3[2]],
param_grid['mode'][ind3[0]],
param_grid['tau'][ind3[1]]
): test_dwt_lda_d2[ind3],
}
config_string = '%s thresholding; with_abs=%d, with_mean=%d, with_std=%d' % (
th, with_abs, with_mean, with_std
)
df_out = pd.DataFrame(df_out)
print config_string
print df_out
print
Up to subsection $\quad$ Up to section $\quad$ Return to contents
f, ax = plt.subplots(1, 2, figsize=(12, 4))
sns.set_style('darkgrid')
for i_dnm in xrange(len(dirnames)):
current_dirname = dirnames[i_dnm]
for k in xrange(ntab):
index = np.unravel_index(k, parBool, order='C')
current_param_values = [param_grid[loopParamNames[i]][index[i]] for i in xrange(len(loopParamNames))]
th, with_abs, with_mean, with_std = current_param_values
#train_score, test_score, gparams = df2vec3(df_dwt_lda_plain)
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'plain', which='mean'
)
indices = get_indices_lda_dwt(gparams, th, with_abs, with_mean, with_std) #####################
train_dwt_lda_plain, test_dwt_lda_plain = train_score[indices], test_score[indices]
#train_score, test_score, gparams = df2vec3(df_dwt_lda_d1)
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'d1', which='mean'
)
indices = get_indices_lda_dwt(gparams, th, with_abs, with_mean, with_std) ############
train_dwt_lda_d1, test_dwt_lda_d1 = train_score[indices], test_score[indices]
#train_score, test_score, gparams = df2vec3(df_dwt_lda_d2)
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'d2', which='mean'
)
indices = get_indices_lda_dwt(gparams, th, with_abs, with_mean, with_std) ###################
train_dwt_lda_d2, test_dwt_lda_d2 = train_score[indices], test_score[indices]
train_dwt_lda_plain = np.reshape(train_dwt_lda_plain, [Nmodes, Ntau, Nwavs]) # C order
test_dwt_lda_plain = np.reshape(test_dwt_lda_plain, [Nmodes, Ntau, Nwavs]) # C order
train_dwt_lda_d1 = np.reshape(train_dwt_lda_d1, [Nmodes, Ntau, Nwavs]) # C order
test_dwt_lda_d1 = np.reshape(test_dwt_lda_d1, [Nmodes, Ntau, Nwavs]) # C order
train_dwt_lda_d2 = np.reshape(train_dwt_lda_d2, [Nmodes, Ntau, Nwavs]) # C order
test_dwt_lda_d2 = np.reshape(test_dwt_lda_d2, [Nmodes, Ntau, Nwavs]) # C order
ind1 = np.where(test_dwt_lda_plain == test_dwt_lda_plain.max())
ind1 = tuple(map(lambda x: x[0], ind1))
ind2 = np.where(test_dwt_lda_d1 == test_dwt_lda_d1.max())
ind2 = tuple(map(lambda x: x[0], ind2))
ind3 = np.where(test_dwt_lda_d2 == test_dwt_lda_d2.max())
ind3 = tuple(map(lambda x: x[0], ind3))
colour_ind = k%8
if th == 'hard':
ax[i_dnm].scatter(
train_dwt_lda_plain[ind1], test_dwt_lda_plain[ind1], marker=markers_trte['original'],
color=colours_trte[colour_ind], alpha=0.6,
linewidth=0.5
)
ax[i_dnm].scatter(
train_dwt_lda_d1[ind2], test_dwt_lda_d1[ind2], marker=markers_trte['d1'],
color=colours_trte[colour_ind], alpha=0.6,
linewidth=0.5
)
ax[i_dnm].scatter(
train_dwt_lda_d2[ind3], test_dwt_lda_d2[ind3], marker=markers_trte['d2'],
color=colours_trte[colour_ind], alpha=0.6,
linewidth=0.5
)
else:
ax[i_dnm].plot(
train_dwt_lda_plain[ind1], test_dwt_lda_plain[ind1], marker=markers_trte['original'],
alpha=0.9, markerfacecolor='None', markeredgewidth=1, markeredgecolor=colours_trte[colour_ind],
linewidth=0.5, markersize=7
)
ax[i_dnm].plot(
train_dwt_lda_d1[ind2], test_dwt_lda_d1[ind2], marker=markers_trte['d1'],
alpha=0.9, markerfacecolor='None', markeredgewidth=1, markeredgecolor=colours_trte[colour_ind],
linewidth=0.5, markersize=7
)
ax[i_dnm].plot(
train_dwt_lda_d2[ind3], test_dwt_lda_d2[ind3], marker=markers_trte['d2'],
alpha=0.9, markerfacecolor='None', markeredgewidth=1, markeredgecolor=colours_trte[colour_ind],
linewidth=0.5, markersize=7
)
#break
ax[i_dnm].set_xlim([0.6, 1.])
ax[i_dnm].set_ylim([0.25, 1.])
ax[i_dnm].set_xlabel('Accuracy (train)')
ax[i_dnm].set_ylabel('Accuracy (test)')
ax[i_dnm].plot([0., 1.], [0., 1.], ls="--", c=".3")
ax[i_dnm].set_title(nonlin_names[i_dnm])
ax[i_dnm].legend(
handles=legend_elements_trte, facecolor='white',
edgecolor='black', fancybox=True, framealpha=1, frameon=True,
ncol=1, bbox_to_anchor=(1.1, 1.)
)
plt.show()
Up to subsection $\quad$ Up to section $\quad$ Return to contents
th = 'hard'
with_abs = 0
with_mean = 1
with_std = 1
print 'current config: th=%s, with_abs=%d, with_mean=%d, with_std=%d' % (
th, with_abs, with_mean, with_std
)
for i_wf in xrange(len(waveletFamilyNames)):
currentWaveletFamilyName = waveletFamilyNames[i_wf]
print
print currentWaveletFamilyName
for i_dnm in xrange(len(dirnames)):
current_dirname = dirnames[i_dnm]
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'plain', which='mean'
)
indices = get_indices_lda_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lda_plain, test_dwt_lda_plain = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'d1', which='mean'
)
indices = get_indices_lda_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lda_d1, test_dwt_lda_d1 = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'d2', which='mean'
)
indices = get_indices_lda_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lda_d2, test_dwt_lda_d2 = train_score[indices], test_score[indices]
train_dwt_lda_plain = np.reshape(train_dwt_lda_plain, [Nmodes*Ntau, -1]) # C order
test_dwt_lda_plain = np.reshape(test_dwt_lda_plain, [Nmodes*Ntau, -1]) # C order
train_dwt_lda_d1 = np.reshape(train_dwt_lda_d1, [Nmodes*Ntau, -1]) # C order
test_dwt_lda_d1 = np.reshape(test_dwt_lda_d1, [Nmodes*Ntau, -1]) # C order
train_dwt_lda_d2 = np.reshape(train_dwt_lda_d2, [Nmodes*Ntau, -1]) # C order
test_dwt_lda_d2 = np.reshape(test_dwt_lda_d2, [Nmodes*Ntau, -1]) # C order
#train_dwt_stacked = np.stack([train_dwt_lda_plain, train_dwt_lda_d1, train_dwt_lda_d2], axis=0)
#test_dwt_stacked = np.stack([test_dwt_lda_plain, test_dwt_lda_d1, test_dwt_lda_d2], axis=0)
#ind = np.where(test_dwt_stacked == test_dwt_stacked.max())[0]
#train_dwt_stacked = train_dwt_stacked[ind[0]]
#test_dwt_stacked = test_dwt_stacked[ind[0]]
ind1 = np.argmax(test_dwt_lda_plain, axis=0)
ind2 = np.argmax(test_dwt_lda_d1, axis=0)
ind3 = np.argmax(test_dwt_lda_d2, axis=0)
currentWaveletNames = filter(
lambda x: x.startswith(currentWaveletFamilyName),
param_grid['wavelet_name']
)
f, ax = plt.subplots(figsize=(1.35*len(currentWaveletNames), 2))
barwidth = 0.2
x = np.arange(len(currentWaveletNames))
acc_train = [train_dwt_lda_plain[ind1[i], i] for i in xrange(len(currentWaveletNames))]
acc_test = [test_dwt_lda_plain[ind1[i], i] for i in xrange(len(currentWaveletNames))]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1a = ax.bar(
x, acc_test, width=barwidth, color=barplot_palette[0], alpha=0.8
)
b2a = ax.bar(
x, acc_train-acc_test, bottom=acc_test, width=barwidth, color=barplot_palette[1], alpha=0.3
)
acc_train = [train_dwt_lda_d1[ind2[i], i] for i in xrange(len(currentWaveletNames))]
acc_test = [test_dwt_lda_d1[ind2[i], i] for i in xrange(len(currentWaveletNames))]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1b = ax.bar(
x+barwidth, acc_test, width=barwidth, color=barplot_palette[2], alpha=0.8
)
b2b = ax.bar(
x+barwidth, acc_train-acc_test, bottom=acc_test, width=barwidth, color=barplot_palette[3],
alpha=0.3
)
acc_train = [train_dwt_lda_d2[ind3[i], i] for i in xrange(len(currentWaveletNames))]
acc_test = [test_dwt_lda_d2[ind3[i], i] for i in xrange(len(currentWaveletNames))]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1c = ax.bar(
x+2*barwidth, acc_test, width=barwidth, color=barplot_palette[4], alpha=0.8
)
b2c = ax.bar(
x+2*barwidth, acc_train-acc_test, bottom=acc_test, width=barwidth, color=barplot_palette[5],
alpha=0.3
)
xticks = copy.deepcopy(currentWaveletNames)
ax.set_ylim(0., 1.)
ax.set_xticks(x+barwidth)
ax.set_xticklabels(xticks)
ax.legend(
(b2a[0], b1a[0], b2b[0], b1b[0], b2c[0], b1c[0]),
('train (f)', 'test (f)', "train (f')", "test (f')", 'train (f")', 'test (f")'),
loc='upper right',
bbox_to_anchor=(1.08, 1.)
)
ax.yaxis.grid(True)
ax.set_title(nonlin_names[i_dnm])
plt.show()
Up to subsection $\quad$ Up to section $\quad$ Return to contents
th = 'hard'
with_abs = 0
with_mean = 1
with_std = 1
for i_wf in xrange(len(waveletFamilyNames)):
currentWaveletFamilyName = waveletFamilyNames[i_wf]
currentWaveletNamesInd = map(
lambda x: x.startswith(currentWaveletFamilyName), param_grid['wavelet_name']
)
currentWaveletNames = np.array(param_grid['wavelet_name'])[currentWaveletNamesInd]
print '\t\t ======= %s =======' % (currentWaveletFamilyName)
train_dwt_lda_plain_list = []
test_dwt_lda_plain_list = []
train_dwt_lda_d1_list = []
test_dwt_lda_d1_list = []
train_dwt_lda_d2_list = []
test_dwt_lda_d2_list = []
for i_dnm in xrange(len(dirnames)):
current_dirname = dirnames[i_dnm]
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'plain', which='mean'
)
indices = get_indices_lda_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lda_plain, test_dwt_lda_plain = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'d1', which='mean'
)
indices = get_indices_lda_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lda_d1, test_dwt_lda_d1 = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3_fromDirectory(
current_dirname, filename_base+'d2', which='mean'
)
indices = get_indices_lda_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lda_d2, test_dwt_lda_d2 = train_score[indices], test_score[indices]
train_dwt_lda_plain_list.append( np.reshape(train_dwt_lda_plain, [Nmodes, Ntau, -1]) )
test_dwt_lda_plain_list.append( np.reshape(test_dwt_lda_plain, [Nmodes, Ntau, -1]) )
train_dwt_lda_d1_list.append( np.reshape(train_dwt_lda_d1, [Nmodes, Ntau, -1]) )
test_dwt_lda_d1_list.append( np.reshape(test_dwt_lda_d1, [Nmodes, Ntau, -1]) )
train_dwt_lda_d2_list.append( np.reshape(train_dwt_lda_d2, [Nmodes, Ntau, -1]) )
test_dwt_lda_d2_list.append( np.reshape(test_dwt_lda_d2, [Nmodes, Ntau, -1]) )
for i_wn in xrange(len(currentWaveletNames)):
currentWaveletName = currentWaveletNames[i_wn]
print '\t ======= %s' % (currentWaveletName)
for i_dnm in xrange(len(dirnames)):
f, ax = plt.subplots(figsize=(1.35*Nmodes, 2))
barwidth = 0.2
x = np.arange(Nmodes)
ind1 = np.argmax(test_dwt_lda_plain_list[i_dnm][:, :, i_wn], axis=0)
ind2 = np.argmax(test_dwt_lda_d1_list[i_dnm][:, :, i_wn], axis=0)
ind3 = np.argmax(test_dwt_lda_d2_list[i_dnm][:, :, i_wn], axis=0)
acc_train = [train_dwt_lda_plain_list[i_dnm][i, ind1[i], i_wn] for i in xrange(Nmodes)]
acc_test = [test_dwt_lda_plain_list[i_dnm][i, ind1[i], i_wn] for i in xrange(Nmodes)]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1a = ax.bar(
x, acc_test, width=barwidth, color=barplot_palette[0], alpha=0.8
)
b2a = ax.bar(
x, acc_train-acc_test, bottom=acc_test, width=barwidth, color=barplot_palette[1],
alpha=0.3
)
acc_train = [train_dwt_lda_d1_list[i_dnm][i, ind2[i], i_wn] for i in xrange(Nmodes)]
acc_test = [test_dwt_lda_d1_list[i_dnm][i, ind2[i], i_wn] for i in xrange(Nmodes)]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1b = ax.bar(
x+barwidth, acc_test, width=barwidth, color=barplot_palette[2], alpha=0.8
)
b2b = ax.bar(
x+barwidth, acc_train-acc_test, bottom=acc_test, width=barwidth,
color=barplot_palette[3], alpha=0.3
)
acc_train = [train_dwt_lda_d2_list[i_dnm][i, ind3[i], i_wn] for i in xrange(Nmodes)]
acc_test = [test_dwt_lda_d2_list[i_dnm][i, ind3[i], i_wn] for i in xrange(Nmodes)]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1c = ax.bar(
x+2*barwidth, acc_test, width=barwidth, color=barplot_palette[4], alpha=0.8
)
b2c = ax.bar(
x+2*barwidth, acc_train-acc_test, bottom=acc_test, width=barwidth,
color=barplot_palette[5], alpha=0.3
)
localModeNames = map(lambda x: 'periodiz. ' if x == 'periodization' else x, param_grid['mode'])
ax.set_ylim(0., 1.)
ax.set_xticks(x+barwidth)
ax.set_xticklabels(localModeNames)
ax.legend(
(b2a[0], b1a[0], b2b[0], b1b[0], b2c[0], b1c[0]),
('train (f)', 'test (f)', "train (f')", "test (f')", 'train (f")', 'test (f")'),
loc='upper right',
bbox_to_anchor=(1.2, 1.)
)
ax.set_title(nonlin_names[i_dnm])
ax.yaxis.grid(True)
plt.show()
Up to section $\quad$ Return to contents
Up to subsection $\quad$ Up to section $\quad$ Return to contents
filename_base = 'gscv_dwt_lr_'
results_dirname = '../results/dwt/lr/'
if not os.path.isdir(results_dirname):
os.makedirs(results_dirname)
param_grid = {}
param_grid['C'] = np.power(10., np.arange(-5, 6))
param_grid['penalty'] = ['l1', 'l2']
param_grid['with_mean'] = [True, False]
param_grid['with_std'] = [True, False]
param_grid['with_abs'] = [True, False]
param_grid['th'] = ['soft', 'hard']
param_grid['tau'] = np.power(10., np.linspace(-5, 1, 10))
param_grid['wavelet_name'] = [
# biorthogonal (15)
'bior1.1', 'bior1.3', 'bior1.5', 'bior2.2', 'bior2.4', 'bior2.6', 'bior2.8',
'bior3.1', 'bior3.3', 'bior3.5', 'bior3.7', 'bior3.9', 'bior4.4', 'bior5.5',
'bior6.8',
# coiflets (17)
'coif1', 'coif2', 'coif3', 'coif4', 'coif5', 'coif6', 'coif7',
'coif8', 'coif9', 'coif10', 'coif11', 'coif12', 'coif13', 'coif14', 'coif15',
'coif16', 'coif17',
# daubechies (20)
'db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8',
'db9', 'db10', 'db11', 'db12', 'db13', 'db14', 'db15', 'db16', 'db17',
'db18', 'db19', 'db20',
# reversed biorthogonal (15)
'rbio1.1', 'rbio1.3', 'rbio1.5',
'rbio2.2', 'rbio2.4', 'rbio2.6', 'rbio2.8', 'rbio3.1', 'rbio3.3', 'rbio3.5',
'rbio3.7', 'rbio3.9', 'rbio4.4', 'rbio5.5', 'rbio6.8',
# symlets (19)
'sym2', 'sym3',
'sym4', 'sym5', 'sym6', 'sym7', 'sym8', 'sym9', 'sym10', 'sym11', 'sym12',
'sym13', 'sym14', 'sym15', 'sym16', 'sym17', 'sym18', 'sym19', 'sym20'
]
# param_grid['mode'] = ['constant']
param_grid['mode'] = pywt.Modes.modes
Nmodes = len(param_grid['mode'])
Ntau = len(param_grid['tau'])
Nwavs = len(param_grid['wavelet_name'])
Nc = len(param_grid['C'])
Npenalty = len(param_grid['penalty'])
parBool = [
len(param_grid['th']),
len(param_grid['with_abs']),
len(param_grid['with_mean']),
len(param_grid['with_std'])
]
loopParamNames = [
'th',
'with_abs',
'with_mean',
'with_std'
]
ntab = np.prod(parBool)
waveletFamilyNames = [
'bior', 'coif', 'db', 'rbio', 'sym'
]
Up to subsection $\quad$ Up to section $\quad$ Return to contents
# heavy computations; results ~ 1 Gb for each csv
gscv_dwt_lr_plain = GridSearchCV(
estimator=dwtLR(solver='saga', multi_class='ovr', nonlin=sign_thresh),
param_grid=param_grid, cv=custom_cv_short, n_jobs=nJobsGSCV, verbose=True,
return_train_score=True, scoring='accuracy'
)
gscv_dwt_lr_d1 = GridSearchCV(
estimator=dwtLR(solver='saga', multi_class='ovr', nonlin=sign_thresh),
param_grid=param_grid, cv=custom_cv_short, n_jobs=nJobsGSCV, verbose=True,
return_train_score=True, scoring='accuracy'
)
gscv_dwt_lr_d2 = GridSearchCV(
estimator=dwtLR(solver='saga', multi_class='ovr', nonlin=sign_thresh),
param_grid=param_grid, cv=custom_cv_short, n_jobs=nJobsGSCV, verbose=True,
return_train_score=True, scoring='accuracy'
)
gscv_dwt_lr_plain.fit(plainT.T, labels);
df_dwt_lr_plain = pd.DataFrame(gscv_dwt_lr_plain.cv_results_)
df_dwt_lr_plain.to_csv(results_dirname+filename_base+'plain.csv', index=False)
print 'Results are ready for plain data'
gscv_dwt_lr_d1.fit(dT.T, labels);
df_dwt_lr_d1 = pd.DataFrame(gscv_dwt_lr_d1.cv_results_)
df_dwt_lr_d1.to_csv(results_dirname+filename_base+'d1.csv', index=False)
print 'Results are ready for 1st derivative data'
gscv_dwt_lr_d2.fit(d2T.T, labels);
df_dwt_lr_d2 = pd.DataFrame(gscv_dwt_lr_d2.cv_results_)
df_dwt_lr_d2.to_csv(results_dirname+filename_base+'d2.csv', index=False)
print 'Results are ready for 2nd derivative data'
Up to subsection $\quad$ Up to section $\quad$ Return to contents
df_dwt_lr_plain = pd.read_csv(results_dirname+filename_base+'plain.csv')
df_dwt_lr_d1 = pd.read_csv(results_dirname+filename_base+'d1.csv')
df_dwt_lr_d2 = pd.read_csv(results_dirname+filename_base+'d2.csv')
print "starting..."
for k in xrange(ntab):
index = np.unravel_index(k, parBool, order='C')
current_param_values = [param_grid[loopParamNames[i]][index[i]] for i in xrange(len(loopParamNames))]
th, with_abs, with_mean, with_std = current_param_values
train_score, test_score, gparams = df2vec3(df_dwt_lr_plain)
indices = get_indices_lr_dwt(gparams, th, with_abs, with_mean, with_std)
train_dwt_lr_plain, test_dwt_lr_plain = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3(df_dwt_lr_d1)
indices = get_indices_lr_dwt(gparams, th, with_abs, with_mean, with_std)
train_dwt_lr_d1, test_dwt_lr_d1 = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3(df_dwt_lr_d2)
indices = get_indices_lr_dwt(gparams, th, with_abs, with_mean, with_std)
train_dwt_lr_d2, test_dwt_lr_d2 = train_score[indices], test_score[indices]
train_dwt_lr_plain = np.reshape(train_dwt_lr_plain, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
test_dwt_lr_plain = np.reshape(test_dwt_lr_plain, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
train_dwt_lr_d1 = np.reshape(train_dwt_lr_d1, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
test_dwt_lr_d1 = np.reshape(test_dwt_lr_d1, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
train_dwt_lr_d2 = np.reshape(train_dwt_lr_d2, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
test_dwt_lr_d2 = np.reshape(test_dwt_lr_d2, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
ind1 = np.where(test_dwt_lr_plain == test_dwt_lr_plain.max())
ind1 = tuple(map(lambda x: x[0], ind1))
ind2 = np.where(test_dwt_lr_d1 == test_dwt_lr_d1.max())
ind2 = tuple(map(lambda x: x[0], ind2))
ind3 = np.where(test_dwt_lr_d2 == test_dwt_lr_d2.max())
ind3 = tuple(map(lambda x: x[0], ind3))
df_out = {}
df_out['train'] = {
'original (%s %s [%s, C=%.1e, tau=%.2e])' % (
param_grid['wavelet_name'][ind1[4]],
param_grid['mode'][ind1[1]],
param_grid['penalty'][ind1[2]],
param_grid['C'][ind1[0]],
param_grid['tau'][ind1[3]],
): train_dwt_lr_plain[ind1],
'1st deriv. (%s %s [%s, C=%.1e, tau=%.2e])' % (
param_grid['wavelet_name'][ind2[4]],
param_grid['mode'][ind2[1]],
param_grid['penalty'][ind2[2]],
param_grid['C'][ind2[0]],
param_grid['tau'][ind2[3]],
): train_dwt_lr_d1[ind2],
'2nd deriv. (%s %s [%s, C=%.1e, tau=%.2e])' % (
param_grid['wavelet_name'][ind3[4]],
param_grid['mode'][ind3[1]],
param_grid['penalty'][ind3[2]],
param_grid['C'][ind3[0]],
param_grid['tau'][ind3[3]],
): train_dwt_lr_d2[ind3],
}
df_out['test'] = {
'original (%s %s [%s, C=%.1e, tau=%.2e])' % (
param_grid['wavelet_name'][ind1[4]],
param_grid['mode'][ind1[1]],
param_grid['penalty'][ind1[2]],
param_grid['C'][ind1[0]],
param_grid['tau'][ind1[3]],
): test_dwt_lr_plain[ind1],
'1st deriv. (%s %s [%s, C=%.1e, tau=%.2e])' % (
param_grid['wavelet_name'][ind2[4]],
param_grid['mode'][ind2[1]],
param_grid['penalty'][ind2[2]],
param_grid['C'][ind2[0]],
param_grid['tau'][ind2[3]],
): test_dwt_lr_d1[ind2],
'2nd deriv. (%s %s [%s, C=%.1e, tau=%.2e])' % (
param_grid['wavelet_name'][ind3[4]],
param_grid['mode'][ind3[1]],
param_grid['penalty'][ind3[2]],
param_grid['C'][ind3[0]],
param_grid['tau'][ind3[3]],
): test_dwt_lr_d2[ind3],
}
config_string = '%s thresholding, with_abs=%d, with_mean=%d, with_std=%d' % (
th, with_abs, with_mean, with_std
)
df_out = pd.DataFrame(df_out)
print config_string
print df_out
print
Up to subsection $\quad$ Up to section $\quad$ Return to contents
df_dwt_lr_plain = pd.read_csv(results_dirname+filename_base+'plain.csv')
df_dwt_lr_d1 = pd.read_csv(results_dirname+filename_base+'d1.csv')
df_dwt_lr_d2 = pd.read_csv(results_dirname+filename_base+'d2.csv')
sns.set_style('darkgrid')
f, ax = plt.subplots(1, 1, figsize=(5, 4))
#points = []
df = np.load(results_dirname+'train-test_points_lr_swap.npz')
points = df['points']
for k in xrange(ntab):
index = np.unravel_index(k, parBool, order='C')
current_param_values = [param_grid[loopParamNames[i]][index[i]] for i in xrange(len(loopParamNames))]
th, with_abs, with_mean, with_std = current_param_values
train_score, test_score, gparams = df2vec3(df_dwt_lr_plain)
indices = get_indices_lr_dwt(gparams, th, with_abs, with_mean, with_std)
train_dwt_lr_plain, test_dwt_lr_plain = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3(df_dwt_lr_d1)
indices = get_indices_lr_dwt(gparams, th, with_abs, with_mean, with_std)
train_dwt_lr_d1, test_dwt_lr_d1 = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3(df_dwt_lr_d2)
indices = get_indices_lr_dwt(gparams, th, with_abs, with_mean, with_std)
train_dwt_lr_d2, test_dwt_lr_d2 = train_score[indices], test_score[indices]
train_dwt_lr_plain = np.reshape(train_dwt_lr_plain, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
test_dwt_lr_plain = np.reshape(test_dwt_lr_plain, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
train_dwt_lr_d1 = np.reshape(train_dwt_lr_d1, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
test_dwt_lr_d1 = np.reshape(test_dwt_lr_d1, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
train_dwt_lr_d2 = np.reshape(train_dwt_lr_d2, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
test_dwt_lr_d2 = np.reshape(test_dwt_lr_d2, [Nc, Nmodes, Npenalty, Ntau, Nwavs]) # C order
ind1 = np.where(test_dwt_lr_plain == test_dwt_lr_plain.max())
ind1 = tuple(map(lambda x: x[0], ind1))
ind2 = np.where(test_dwt_lr_d1 == test_dwt_lr_d1.max())
ind2 = tuple(map(lambda x: x[0], ind2))
ind3 = np.where(test_dwt_lr_d2 == test_dwt_lr_d2.max())
ind3 = tuple(map(lambda x: x[0], ind3))
'''
points_local = [
(train_dwt_lr_plain[ind1], test_dwt_lr_plain[ind1]),
(train_dwt_lr_d1[ind2], test_dwt_lr_d1[ind2]),
(train_dwt_lr_d2[ind3], test_dwt_lr_d2[ind3])
]
points.append(points_local)
'''
points_local = points[k]
colour_ind = k%8
if th == 'hard':
ax.scatter(
train_dwt_lr_plain[ind1], test_dwt_lr_plain[ind1], marker=markers_trte['original'],
#points_local[0][0], points_local[0][1], marker=markers_trte['original'],
color=colours_trte[colour_ind], alpha=0.6,
linewidth=0.5
)
ax.scatter(
train_dwt_lr_d1[ind2], test_dwt_lr_d1[ind2], marker=markers_trte['d1'],
#points_local[1][0], points_local[1][1], marker=markers_trte['d1'],
color=colours_trte[colour_ind], alpha=0.6,
linewidth=0.5
)
ax.scatter(
train_dwt_lr_d2[ind3], test_dwt_lr_d2[ind3], marker=markers_trte['d2'],
#points_local[2][0], points_local[2][1], marker=markers_trte['d2'],
color=colours_trte[colour_ind], alpha=0.6,
linewidth=0.5
)
else:
ax.plot(
train_dwt_lr_plain[ind1], test_dwt_lr_plain[ind1], marker=markers_trte['original'],
#points_local[0][0], points_local[0][1], marker=markers_trte['original'],
alpha=0.9, markerfacecolor='None', markeredgewidth=1, markeredgecolor=colours_trte[colour_ind],
linewidth=0.5, markersize=7
)
ax.plot(
train_dwt_lr_d1[ind2], test_dwt_lr_d1[ind2], marker=markers_trte['d1'],
#points_local[1][0], points_local[1][1], marker=markers_trte['d1'],
alpha=0.9, markerfacecolor='None', markeredgewidth=1, markeredgecolor=colours_trte[colour_ind],
linewidth=0.5, markersize=7
)
ax.plot(
train_dwt_lr_d2[ind3], test_dwt_lr_d2[ind3], marker=markers_trte['d2'],
#points_local[2][0], points_local[2][1], marker=markers_trte['d2'],
alpha=0.9, markerfacecolor='None', markeredgewidth=1, markeredgecolor=colours_trte[colour_ind],
linewidth=0.5, markersize=7
)
#np.savez_compressed(results_dirname+'train-test_points_lr_swap', points=points)
ax.set_xlim([0.75, 1.01])
ax.set_ylim([0.35, 1.01])
ax.set_xlabel('Accuracy (train)')
ax.set_ylabel('Accuracy (test)')
ax.plot([0., 1.], [0., 1.], ls="--", c=".3")
ax.legend(
handles=legend_elements_trte, loc='best', facecolor='white',
edgecolor='black', fancybox=True, framealpha=1, frameon=True,
ncol=1, bbox_to_anchor=(1.1, 1.)
)
plt.show()
Up to subsection $\quad$ Up to section $\quad$ Return to contents
th = 'hard'
with_abs = 0
with_mean = 0
with_std = 1
df_dwt_lr_plain = pd.read_csv(results_dirname+filename_base+'plain.csv')
df_dwt_lr_d1 = pd.read_csv(results_dirname+filename_base+'d1.csv')
df_dwt_lr_d2 = pd.read_csv(results_dirname+filename_base+'d2.csv')
for i_wf in xrange(len(waveletFamilyNames)):
currentWaveletFamilyName = waveletFamilyNames[i_wf]
train_score, test_score, gparams = df2vec3(df_dwt_lr_plain)
indices = get_indices_lr_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lr_plain, test_dwt_lr_plain = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3(df_dwt_lr_d1)
indices = get_indices_lr_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lr_d1, test_dwt_lr_d1 = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3(df_dwt_lr_d2)
indices = get_indices_lr_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lr_d2, test_dwt_lr_d2 = train_score[indices], test_score[indices]
train_dwt_lr_plain = np.reshape(train_dwt_lr_plain, [Nc*Npenalty*Nmodes*Ntau, -1]) # C order
test_dwt_lr_plain = np.reshape(test_dwt_lr_plain, [Nc*Npenalty*Nmodes*Ntau, -1]) # C order
train_dwt_lr_d1 = np.reshape(train_dwt_lr_d1, [Nc*Npenalty*Nmodes*Ntau, -1]) # C order
test_dwt_lr_d1 = np.reshape(test_dwt_lr_d1, [Nc*Npenalty*Nmodes*Ntau, -1]) # C order
train_dwt_lr_d2 = np.reshape(train_dwt_lr_d2, [Nc*Npenalty*Nmodes*Ntau, -1]) # C order
test_dwt_lr_d2 = np.reshape(test_dwt_lr_d2, [Nc*Npenalty*Nmodes*Ntau, -1]) # C order
train_dwt_stacked = np.stack([train_dwt_lr_plain, train_dwt_lr_d1, train_dwt_lr_d2], axis=0)
test_dwt_stacked = np.stack([test_dwt_lr_plain, test_dwt_lr_d1, test_dwt_lr_d2], axis=0)
ind = np.argmax(test_dwt_stacked[0], axis=0) # original signal; to obtain same modes for f, f', f"
ind = np.unravel_index(ind, [Nc, Nmodes, Npenalty, Ntau], order='C')
localModeNames = map(lambda x: param_grid['mode'][x], ind[1])
localModeNames = map(lambda x: 'periodiz. ' if x == 'periodization' else x, localModeNames)
localModeNames = map(lambda x: 'symmet. ' if x == 'symmetric' else x, localModeNames)
localModeNames = map(lambda x: 'const. ' if x == 'constant' else x, localModeNames)
train_dwt_stacked = np.reshape(train_dwt_stacked, [3, Nc, Nmodes, Npenalty, Ntau, -1], order='C')
test_dwt_stacked = np.reshape(test_dwt_stacked, [3, Nc, Nmodes, Npenalty, Ntau, -1], order='C')
train_dwt_stacked = train_dwt_stacked[:, :, ind[1], :, :, :]
test_dwt_stacked = test_dwt_stacked[:, :, ind[1], :, :, :]
train_dwt_stacked = np.reshape(train_dwt_stacked, [3, Nc*Npenalty*Ntau, -1]) # C order
test_dwt_stacked = np.reshape(test_dwt_stacked, [3, Nc*Npenalty*Ntau, -1]) # C order
ind1 = np.argmax(test_dwt_stacked[0], axis=0)
ind2 = np.argmax(test_dwt_stacked[1], axis=0)
ind3 = np.argmax(test_dwt_stacked[2], axis=0)
currentWaveletNames = filter(
lambda x: x.startswith(currentWaveletFamilyName),
param_grid['wavelet_name']
)
f, ax = plt.subplots(figsize=(1.35*10, 2))
barwidth = 0.2
x = np.arange(len(currentWaveletNames))
acc_train = [train_dwt_stacked[0, ind1[i], i] for i in xrange(len(currentWaveletNames))]
acc_test = [test_dwt_stacked[0, ind1[i], i] for i in xrange(len(currentWaveletNames))]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1a = ax.bar(
x, acc_test, width=barwidth, color=barplot_palette[0], alpha=0.8
)
b2a = ax.bar(
x, acc_train-acc_test, bottom=acc_test, width=barwidth, color=barplot_palette[1], alpha=0.3
)
acc_train = [train_dwt_stacked[1, ind2[i], i] for i in xrange(len(currentWaveletNames))]
acc_test = [test_dwt_stacked[1, ind2[i], i] for i in xrange(len(currentWaveletNames))]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1b = ax.bar(
x+barwidth, acc_test, width=barwidth, color=barplot_palette[2], alpha=0.8
)
b2b = ax.bar(
x+barwidth, acc_train-acc_test, bottom=acc_test, width=barwidth, color=barplot_palette[3],
alpha=0.3
)
acc_train = [train_dwt_stacked[2, ind3[i], i] for i in xrange(len(currentWaveletNames))]
acc_test = [test_dwt_stacked[2, ind3[i], i] for i in xrange(len(currentWaveletNames))]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1c = ax.bar(
x+2*barwidth, acc_test, width=barwidth, color=barplot_palette[4], alpha=0.8
)
b2c = ax.bar(
x+2*barwidth, acc_train-acc_test, bottom=acc_test, width=barwidth, color=barplot_palette[5],
alpha=0.3
)
xticks = map(
lambda x, y: x+'\n '+y+' ', currentWaveletNames, localModeNames
)
ax.set_ylim(0.775, 1.)
ax.set_xticks(x+barwidth)
ax.set_xticklabels(xticks)
ax.legend(
(b2a[0], b1a[0], b2b[0], b1b[0], b2c[0], b1c[0]),
('train (f)', 'test (f)', "train (f')", "test (f')", 'train (f")', 'test (f")'),
loc='upper right',
bbox_to_anchor=(1.08, 1.)
)
ax.yaxis.grid(True)
plt.show()
Up to subsection $\quad$ Up to section $\quad$ Return to contents
th = 'hard'
with_abs = 0
with_mean = 0
with_std = 1
df_dwt_lda_plain = pd.read_csv(results_dirname+filename_base+'plain.csv')
df_dwt_lda_d1 = pd.read_csv(results_dirname+filename_base+'d1.csv')
df_dwt_lda_d2 = pd.read_csv(results_dirname+filename_base+'d2.csv')
for i_wf in xrange(len(waveletFamilyNames)):
currentWaveletFamilyName = waveletFamilyNames[i_wf]
currentWaveletNamesInd = map(
lambda x: x.startswith(currentWaveletFamilyName), param_grid['wavelet_name']
)
currentWaveletNames = np.array(param_grid['wavelet_name'])[currentWaveletNamesInd]
print '\t\t ======= %s =======' % (currentWaveletFamilyName)
train_score, test_score, gparams = df2vec3(df_dwt_lr_plain)
indices = get_indices_lr_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lr_plain, test_dwt_lr_plain = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3(df_dwt_lr_d1)
indices = get_indices_lr_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lr_d1, test_dwt_lr_d1 = train_score[indices], test_score[indices]
train_score, test_score, gparams = df2vec3(df_dwt_lr_d2)
indices = get_indices_lr_dwt(
gparams, th, with_abs, with_mean, with_std, wavelet_family=currentWaveletFamilyName
)
train_dwt_lr_d2, test_dwt_lr_d2 = train_score[indices], test_score[indices]
train_dwt_lr_plain = np.reshape(train_dwt_lr_plain, [Nc, Nmodes, Npenalty, Ntau, -1]) # C order
test_dwt_lr_plain = np.reshape(test_dwt_lr_plain, [Nc, Nmodes, Npenalty, Ntau, -1]) # C order
train_dwt_lr_d1 = np.reshape(train_dwt_lr_d1, [Nc, Nmodes, Npenalty, Ntau, -1]) # C order
test_dwt_lr_d1 = np.reshape(test_dwt_lr_d1, [Nc, Nmodes, Npenalty, Ntau, -1]) # C order
train_dwt_lr_d2 = np.reshape(train_dwt_lr_d2, [Nc, Nmodes, Npenalty, Ntau, -1]) # C order
test_dwt_lr_d2 = np.reshape(test_dwt_lr_d2, [Nc, Nmodes, Npenalty, Ntau, -1]) # C order
test_dwt_lr_stacked = np.stack([test_dwt_lr_plain, test_dwt_lr_d1, test_dwt_lr_d2], axis=0)
# axes: (feature space, C, mode, tau, wavelet)
test_dwt_lr_stacked = np.transpose(test_dwt_lr_stacked, [0, 1, 3, 4, 2, 5])
#test_dwt_lr_stacked = np.reshape(test_dwt_lr_stacked, [3, Nc*Ntau, Nmodes, -1]) # 3 feature spaces
test_dwt_lr_stacked = np.reshape(test_dwt_lr_stacked, [3, Nc*Npenalty*Ntau, Nmodes, -1]) # 3 feature spaces
for i_wn in xrange(len(currentWaveletNames)):
currentWaveletName = currentWaveletNames[i_wn]
print '\t ======= %s' % (currentWaveletName)
f, ax = plt.subplots(figsize=(1.35*Nmodes, 3))
barwidth = 0.2
x = np.arange(Nmodes)
ind = np.argmax(test_dwt_lr_stacked[0, :, :, i_wn], axis=0)
ind = np.unravel_index(ind, [Nc, Npenalty, Ntau]) # C order
acc_train = [train_dwt_lr_plain[ind[0][i], i, ind[1][i], ind[2][i], i_wn] for i in xrange(Nmodes)]
acc_test = [test_dwt_lr_plain[ind[0][i], i, ind[1][i], ind[2][i], i_wn] for i in xrange(Nmodes)]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1a = ax.bar(
x, acc_test, width=barwidth, color=barplot_palette[0], alpha=0.8
)
b2a = ax.bar(
x, acc_train-acc_test, bottom=acc_test, width=barwidth, color=barplot_palette[1],
alpha=0.8
)
ind = np.argmax(test_dwt_lr_stacked[1, :, :, i_wn], axis=0)
ind = np.unravel_index(ind, [Nc, Npenalty, Ntau]) # C order
acc_train = [train_dwt_lr_d1[ind[0][i], i, ind[1][i], ind[2][i], i_wn] for i in xrange(Nmodes)]
acc_test = [test_dwt_lr_d1[ind[0][i], i, ind[1][i], ind[2][i], i_wn] for i in xrange(Nmodes)]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1b = ax.bar(
x+barwidth, acc_test, width=barwidth, color=barplot_palette[2], alpha=0.8
)
b2b = ax.bar(
x+barwidth, acc_train-acc_test, bottom=acc_test, width=barwidth,
color=barplot_palette[3], alpha=0.8
)
ind = np.argmax(test_dwt_lr_stacked[2, :, :, i_wn], axis=0)
ind = np.unravel_index(ind, [Nc, Npenalty, Ntau]) # C order
acc_train = [train_dwt_lr_d2[ind[0][i], i, ind[1][i], ind[2][i], i_wn] for i in xrange(Nmodes)]
acc_test = [test_dwt_lr_d2[ind[0][i], i, ind[1][i], ind[2][i], i_wn] for i in xrange(Nmodes)]
acc_train = np.array(acc_train)
acc_test = np.array(acc_test)
b1c = ax.bar(
x+2*barwidth, acc_test, width=barwidth, color=barplot_palette[4], alpha=0.8
)
b2c = ax.bar(
x+2*barwidth, acc_train-acc_test, bottom=acc_test, width=barwidth,
color=barplot_palette[5], alpha=0.3
)
localModeNames = map(lambda x: 'periodiz. ' if x == 'periodization' else x, param_grid['mode'])
ax.set_ylim(0.775, 1.)
ax.set_xticks(x+barwidth)
ax.set_xticklabels(localModeNames)
ax.legend(
(b2a[0], b1a[0], b2b[0], b1b[0], b2c[0], b1c[0]),
('train (f)', 'test (f)', "train (f')", "test (f')", 'train (f")', 'test (f")'),
loc='upper right',
bbox_to_anchor=(1.2, 1.)
)
ax.yaxis.grid(True)
plt.show()
Up to section $\quad$ Return to contents
results_dirname = '../results/dwt/ac/'
if not os.path.isdir(results_dirname):
os.makedirs(results_dirname)
filename_base = 'gscv_dwt_ac_'
n_clusters = len(np.unique(labels))
print 'Number of cluster: %d ' % (n_clusters)
param_grid = {}
param_grid['with_mean'] = [True, False]
param_grid['with_std'] = [True, False]
param_grid['with_abs'] = [True, False]
param_grid['stsc_axis'] = [0, 1]
param_grid['affinity'] = ['l1', 'l2', 'cosine', 'rbf']
param_grid['linkage'] = ['complete', 'average']
param_grid['subtract'] = [True, False]
param_grid['th'] = ['soft', 'hard']
param_grid['tau'] = np.power(10., np.linspace(-5, 1, 10))
waveletFamilyNames = ['bior', 'coif', 'db', 'rbio', 'sym']
wav_list = [
# biorthogonal (15)
'bior1.1', 'bior1.3', 'bior1.5', 'bior2.2', 'bior2.4', 'bior2.6', 'bior2.8',
'bior3.1', 'bior3.3', 'bior3.5', 'bior3.7', 'bior3.9', 'bior4.4', 'bior5.5',
'bior6.8',
# coiflets (17)
'coif1', 'coif2', 'coif3', 'coif4', 'coif5', 'coif6', 'coif7',
'coif8', 'coif9', 'coif10', 'coif11', 'coif12', 'coif13', 'coif14', 'coif15',
'coif16', 'coif17',
# daubechies (20)
'db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8',
'db9', 'db10', 'db11', 'db12', 'db13', 'db14', 'db15', 'db16', 'db17',
'db18', 'db19', 'db20',
# reversed biorthogonal (15)
'rbio1.1', 'rbio1.3', 'rbio1.5',
'rbio2.2', 'rbio2.4', 'rbio2.6', 'rbio2.8', 'rbio3.1', 'rbio3.3', 'rbio3.5',
'rbio3.7', 'rbio3.9', 'rbio4.4', 'rbio5.5', 'rbio6.8',
# symlets (19)
'sym2', 'sym3',
'sym4', 'sym5', 'sym6', 'sym7', 'sym8', 'sym9', 'sym10', 'sym11', 'sym12',
'sym13', 'sym14', 'sym15', 'sym16', 'sym17', 'sym18', 'sym19', 'sym20'
]
param_grid['mode'] = pywt.Modes.modes
Nwavfam = len(waveletFamilyNames)
Nmode = len(param_grid['mode'])
Ntau = len(param_grid['tau'])
Naffinity = len(param_grid['affinity'])
Nlinkage = len(param_grid['linkage'])
scoring = [
adjusted_mutual_info_score,
adjusted_rand_score,
fowlkes_mallows_score
]
scoringNames = [
'Adjusted MI',
'Adjusted Rand',
'Fowlkes-Mallows'
]
ari_sn_i = scoringNames.index('Adjusted Rand')
parBool = [
len(param_grid['stsc_axis']),
len(param_grid['with_abs']),
len(param_grid['with_mean']),
len(param_grid['with_std']),
len(param_grid['subtract']),
len(param_grid['th'])
]
loopParamNames = [
'stsc_axis',
'with_abs',
'with_mean',
'with_std',
'subtract',
'th'
]
ntab = np.prod(parBool)
Up to section $\quad$ Return to contents
for i_wn in xrange(len(waveletFamilyNames)):
current_wf = waveletFamilyNames[i_wn]
print current_wf
param_grid['wavelet_name'] = filter(lambda x: x.startswith(current_wf), wav_list)
for k in xrange(len(custom_cv_short)):
res_dwt_plain, axes_dwt_plain = GridSearchCV_unsupervised(
estimator=dwtAC(n_clusters=n_clusters, nonlin=hos_thresh), X=plainT.T, y=labels,
param_grid=param_grid,
cv=train_indices_short[k:k+1], scoring=scoring, verbose=True, nEval=1
)
np.savez_compressed(
results_dirname+filename_base+'plain_%s_%d.npz' % (current_wf, k),
res_dwt_plain=res_dwt_plain,
axes_dwt_plain=axes_dwt_plain
)
del res_dwt_plain, axes_dwt_plain
print 'Results are ready for plain data'
res_dwt_d1, axes_dwt_d1 = GridSearchCV_unsupervised(
estimator=dwtAC(n_clusters=n_clusters, nonlin=hos_thresh), X=dT.T, y=labels, param_grid=param_grid,
cv=train_indices_short[k:k+1], scoring=scoring, verbose=True, nEval=1
)
np.savez_compressed(
results_dirname+filename_base+'d1_%s_%d.npz' % (current_wf, k),
res_dwt_d1=res_dwt_d1,
axes_dwt_d1=axes_dwt_d1
)
del res_dwt_d1, axes_dwt_d1
print 'Results are ready for 1st derivative data'
res_dwt_d2, axes_dwt_d2 = GridSearchCV_unsupervised(
estimator=dwtAC(n_clusters=n_clusters, nonlin=hos_thresh), X=d2T.T, y=labels, param_grid=param_grid,
cv=train_indices_short[k:k+1], scoring=scoring, verbose=True, nEval=1
)
np.savez_compressed(
results_dirname+filename_base+'d2_%d.npz' % (k),
res_dwt_d2=res_dwt_d2,
axes_dwt_d2=axes_dwt_d2
)
print 'Results are ready for 2nd derivative data'
del res_dwt_d2, axes_dwt_d2
Up to section $\quad$ Return to contents
mosaic_data_ari_plain = {}
mosaic_data_ari_d1 = {}
mosaic_data_ari_d2 = {}
mosaic_data_3var_dict = {
'7': r'$\overline{\mathrm{abs}}$/$\overline{\mathrm{mean}}$/$\overline{\mathrm{std}}$',
'6': r'$\overline{\mathrm{abs}}$/$\overline{\mathrm{mean}}$/std',
'5': r'$\overline{\mathrm{abs}}$/mean/$\overline{\mathrm{std}}$',
'4': r'$\overline{\mathrm{abs}}$/mean/std',
'3': r'abs/$\overline{\mathrm{mean}}$/$\overline{\mathrm{std}}$',
'2': r'abs/$\overline{\mathrm{mean}}$/std',
'1': r'abs/mean/$\overline{\mathrm{std}}$',
'0': r'abs/mean/std'
}
objectiveScoreInd = ari_sn_i
dwt_ac_plain = None
dwt_ac_d1 = None
dwt_ac_d2 = None
for k_wf in xrange(Nwavfam):
current_wfn = waveletFamilyNames[k_wf]
param_grid['wavelet_name'] = filter(lambda x: x.startswith(current_wfn), wav_list)
dwt_ac_plain_loc, axesList_plain = npz2vec3_fromDirectory(
results_dirname, filename_base, 'plain', current_wfn, which='mean'
)
dwt_ac_d1_loc, axesList_d1 = npz2vec3_fromDirectory(
results_dirname, filename_base, 'd1', current_wfn, which='mean'
)
dwt_ac_d2_loc, axesList_d2 = npz2vec3_fromDirectory(
results_dirname, filename_base, 'd2', current_wfn, which='mean'
)
if k_wf == 0:
dwt_ac_plain = dwt_ac_plain_loc.copy()
dwt_ac_d1 = dwt_ac_d1_loc.copy()
dwt_ac_d2 = dwt_ac_d2_loc.copy()
merge_index = axesList_plain.index('wavelet_name')
else:
dwt_ac_plain = np.concatenate([dwt_ac_plain, dwt_ac_plain_loc], axis=merge_index)
dwt_ac_d1 = np.concatenate([dwt_ac_d1, dwt_ac_d1_loc], axis=merge_index)
dwt_ac_d2 = np.concatenate([dwt_ac_d2, dwt_ac_d2_loc], axis=merge_index)
param_grid['wavelet_name'] = copy.deepcopy(wav_list)
for k in xrange(ntab):
index = np.unravel_index(k, parBool, order='C')
current_param_values = [param_grid[loopParamNames[i]][index[i]] for i in xrange(len(loopParamNames))]
stsc_axis, with_abs, with_mean, with_std, subtract, th = current_param_values
ind_plain = np.where(
dwt_ac_plain[
:, :, :, index[0], index[4], :, index[5], :, index[1], index[2], index[3], objectiveScoreInd
] == dwt_ac_plain[
:, :, :, index[0], index[4], :, index[5], :, index[1], index[2], index[3], objectiveScoreInd
].max()
)
ind_d1 = np.where(
dwt_ac_d1[
:, :, :, index[0], index[4], :, index[5], :, index[1], index[2], index[3], objectiveScoreInd
] == dwt_ac_d1[
:, :, :, index[0], index[4], :, index[5], :, index[1], index[2], index[3], objectiveScoreInd
].max()
)
ind_d2 = np.where(
dwt_ac_d2[
:, :, :, index[0], index[4], :, index[5], :, index[1], index[2], index[3], objectiveScoreInd
] == dwt_ac_d2[
:, :, :, index[0], index[4], :, index[5], :, index[1], index[2], index[3], objectiveScoreInd
].max()
)
ind_plain = map(lambda x: x[0], ind_plain)
ind_d1 = map(lambda x: x[0], ind_d1)
ind_d2 = map(lambda x: x[0], ind_d2)
scores_ac_plain = dwt_ac_plain[
ind_plain[0], ind_plain[1], ind_plain[2],
index[0], index[4],
ind_plain[3],
index[5],
ind_plain[4],
index[1], index[2], index[3], :
]
scores_ac_d1 = dwt_ac_d1[
ind_d1[0], ind_d1[1], ind_d1[2],
index[0], index[4],
ind_d1[3],
index[5],
ind_d1[4],
index[1], index[2], index[3], :
]
scores_ac_d2 = dwt_ac_d2[
ind_d2[0], ind_d2[1], ind_d2[2],
index[0], index[4],
ind_d2[3],
index[5],
ind_d2[4],
index[1], index[2], index[3], :
]
mda_ind = (stsc_axis, (k/4)%8, subtract, th)
mosaic_data_ari_plain[mda_ind] = scores_ac_plain[ari_sn_i]
mosaic_data_ari_d1[mda_ind] = scores_ac_d1[ari_sn_i]
mosaic_data_ari_d2[mda_ind] = scores_ac_d2[ari_sn_i]
df_out = {}
for i in xrange(len(scoringNames)):
df_out[scoringNames[i]] = {}
df_out[scoringNames[i]]['original (%s, %s, %s, %.2e, %s): ' % (
param_grid['affinity'][ind_plain[0]],
param_grid['linkage'][ind_plain[1]],
param_grid['mode'][ind_plain[2]],
param_grid['tau'][ind_plain[3]],
param_grid['wavelet_name'][ind_plain[4]]
)] = scores_ac_plain[i]
df_out[scoringNames[i]]['1st derivative (%s, %s, %s, %.2e, %s): ' % (
param_grid['affinity'][ind_d1[0]],
param_grid['linkage'][ind_d1[1]],
param_grid['mode'][ind_d1[2]],
param_grid['tau'][ind_d1[3]],
param_grid['wavelet_name'][ind_d1[4]]
)] = scores_ac_d1[i]
df_out[scoringNames[i]]['2nd derivative (%s, %s, %s, %.2e, %s): ' % (
param_grid['affinity'][ind_d2[0]],
param_grid['linkage'][ind_d2[1]],
param_grid['mode'][ind_d2[2]],
param_grid['tau'][ind_d2[3]],
param_grid['wavelet_name'][ind_d2[4]]
)] = scores_ac_d2[i]
config_string = 'stsc_axis=%d, with_abs=%d, with_mean=%d, with_std=%d, subtract=%d th=%s' % (
stsc_axis, with_abs, with_mean, with_std, subtract, th
)
df_out = pd.DataFrame(df_out)
print config_string
print df_out
print
Up to section $\quad$ Return to contents
sns.set_style('white')
cmapHot = plt.get_cmap('hot')
fig = plt.figure( figsize = ( 8 , 3 ))
ax1 = fig.add_axes([ 0.05 , 0.80 , 0.9 , 0.1 ])
norm = mpl.colors.Normalize(vmin=0., vmax=1.)
cb1 = mpl.colorbar.ColorbarBase(
ax1, cmap=cmapHot, norm=norm, orientation = 'horizontal'
)
cb1.set_label('Adjusted Rand Index')
plt.show()
print 'Original signal'
custom_mosaic_dwt(mosaic_data_ari_plain, mosaic_data_3var_dict, cmapHot)
plt.show()
print '1st derivative'
custom_mosaic_dwt(mosaic_data_ari_d1, mosaic_data_3var_dict, cmapHot)
plt.show()
print '2nd derivative'
custom_mosaic_dwt(mosaic_data_ari_d2, mosaic_data_3var_dict, cmapHot)
plt.show()
Up to section $\quad$ Return to contents
# subset of the best performing parameters
th = 'soft'
with_abs = False
with_mean = False
with_std = True
subtract = True
stsc_axis = 1
ind_th = param_grid['th'].index(th)
ind_abs = param_grid['with_abs'].index(with_abs)
ind_mean = param_grid['with_mean'].index(with_mean)
ind_std = param_grid['with_std'].index(with_std)
ind_stsc = param_grid['stsc_axis'].index(stsc_axis)
ind_sub = param_grid['subtract'].index(subtract)
ind_sc = 1
param_grid['wavelet_name'] = copy.deepcopy(wav_list)
for i_wf in xrange(len(waveletFamilyNames)):
current_wfn = waveletFamilyNames[i_wf]
currentWaveletNamesInd = map(
lambda x: x.startswith(current_wfn), wav_list
)
currentWaveletNames = np.array(param_grid['wavelet_name'])[currentWaveletNamesInd]
dwt_ac_plain_loc, axesList_plain = npz2vec3_fromDirectory(
results_dirname, filename_base, 'plain', current_wfn, which='mean'
)
dwt_ac_d1_loc, axesList_d1 = npz2vec3_fromDirectory(
results_dirname, filename_base, 'd1', current_wfn, which='mean'
)
dwt_ac_d2_loc, axesList_d2 = npz2vec3_fromDirectory(
results_dirname, filename_base, 'd2', current_wfn, which='mean'
)
#(3, 4, 2, 7, 2, 2, 10, 2, 20, 2, 2, 2, 3)
#['affinity', 'linkage', 'mode', 'stsc_axis', 'subtract',
#'tau', 'th', 'wavelet_name', 'with_abs', 'with_mean', 'with_std', 'score']
dwt_ac_all = np.stack([dwt_ac_plain_loc, dwt_ac_d1_loc, dwt_ac_d2_loc], axis=0)
dwt_ac_all = dwt_ac_all[
:, :, :, :, ind_stsc, ind_sub, :, ind_th, :, ind_abs, ind_mean, ind_std, ind_sc
]
dwt_ac_all = np.squeeze(dwt_ac_all)
dwt_ac_all = np.reshape(dwt_ac_all, [3, Naffinity*Nlinkage*Nmode*Ntau, -1], order='C')
ind = np.argmax(dwt_ac_all[0], axis=0)
f, ax = plt.subplots(figsize=(1.35*10, 2))
barwidth = 0.2
x = np.arange(len(currentWaveletNames))
acc_plain = [dwt_ac_all[0, ind[i], i] for i in xrange(len(currentWaveletNames))]
ind = np.unravel_index(ind, [Naffinity, Nlinkage, Nmode, Ntau], order='C') ##########
dwt_ac_plain = np.reshape(dwt_ac_plain_loc[:, :, ind[2], :], [Naffinity*Nlinkage*Ntau, -1], order='C')
dwt_ac_d1 = np.reshape(dwt_ac_d1_loc[:, :, ind[2], :], [Naffinity*Nlinkage*Ntau, -1], order='C')
dwt_ac_d2 = np.reshape(dwt_ac_d2_loc[:, :, ind[2], :], [Naffinity*Nlinkage*Ntau, -1], order='C')
ind1 = np.argmax(dwt_ac_plain, axis=0)
ind2 = np.argmax(dwt_ac_d1, axis=0)
ind3 = np.argmax(dwt_ac_d2, axis=0)
acc_d1 = [dwt_ac_d1[ind2[i], i] for i in xrange(len(currentWaveletNames))]
acc_d2 = [dwt_ac_d2[ind3[i], i] for i in xrange(len(currentWaveletNames))]
b1a = ax.bar(
x, acc_plain, width=barwidth, color=barplot_palette[0], alpha=0.8
)
b1b = ax.bar(
x+barwidth, acc_d1, width=barwidth, color=barplot_palette[2], alpha=0.8
)
b1c = ax.bar(
x+2*barwidth, acc_d2, width=barwidth, color=barplot_palette[4], alpha=0.8
)
localModeNames = map(lambda x: param_grid['mode'][x], ind[2])
localModeNames = map(lambda x: 'periodiz. ' if x == 'periodization' else x, localModeNames)
localModeNames = map(lambda x: 'symmet. ' if x == 'symmetric' else x, localModeNames)
localModeNames = map(lambda x: 'const. ' if x == 'constant' else x, localModeNames)
xticks = map(
lambda x, y: x+'\n'+y,
currentWaveletNames, localModeNames
)
ax.set_ylim(0., 0.62)
ax.set_xticks(x+barwidth)
ax.set_xticklabels(xticks)
ax.legend(
(b1a[0], b1b[0], b1c[0]),
('f', "f'", 'f"'),
loc='upper right',
bbox_to_anchor=(1.08, 1.)
)
ax.yaxis.grid(True)
plt.show()
Up to section $\quad$ Return to contents
# subset of the best performing parameters
#th = 'soft'
#with_abs = False
#with_mean = False
#with_std = True
#subtract = True
#stsc_axis = 1
#ind_th = param_grid['th'].index(th)
#ind_abs = param_grid['with_abs'].index(with_abs)
#ind_mean = param_grid['with_mean'].index(with_mean)
#ind_std = param_grid['with_std'].index(with_std)
#ind_stsc = param_grid['stsc_axis'].index(stsc_axis)
#ind_sub = param_grid['subtract'].index(subtract)
ind_sc = 1
param_grid['wavelet_name'] = copy.deepcopy(wav_list)
for i_wf in xrange(len(waveletFamilyNames)):
current_wfn = waveletFamilyNames[i_wf]
currentWaveletNamesInd = map(
lambda x: x.startswith(current_wfn), wav_list
)
currentWaveletNames = np.array(param_grid['wavelet_name'])[currentWaveletNamesInd]
Nwav_current = len(currentWaveletNames)
dwt_ac_plain, axesList_plain = npz2vec3_fromDirectory(
results_dirname, filename_base, 'plain', current_wfn, which='mean'
)
dwt_ac_d1, axesList_d1 = npz2vec3_fromDirectory(
results_dirname, filename_base, 'd1', current_wfn, which='mean'
)
dwt_ac_d2, axesList_d2 = npz2vec3_fromDirectory(
results_dirname, filename_base, 'd2', current_wfn, which='mean'
)
#(3, 4, 2, 7, 2, 2, 10, 2, 20, 2, 2, 2, 3)
#['affinity', 'linkage', 'mode', 'stsc_axis', 'subtract',
#'tau', 'th', 'wavelet_name', 'with_abs', 'with_mean', 'with_std', 'score']
'''
dwt_ac_plain = dwt_ac_plain[
:, :, :, ind_stsc, ind_sub, :, ind_th, :, ind_abs, ind_mean, ind_std, ind_sc
]
dwt_ac_d1 = dwt_ac_d1[
:, :, :, ind_stsc, ind_sub, :, ind_th, :, ind_abs, ind_mean, ind_std, ind_sc
]
dwt_ac_d2 = dwt_ac_d2[
:, :, :, ind_stsc, ind_sub, :, ind_th, :, ind_abs, ind_mean, ind_std, ind_sc
]
dwt_ac_plain = np.transpose(dwt_ac_plain, [0, 1, 3, 2, 4])
dwt_ac_d1 = np.transpose(dwt_ac_d1, [0, 1, 3, 2, 4])
dwt_ac_d2 = np.transpose(dwt_ac_d2, [0, 1, 3, 2, 4])
dwt_ac_plain = np.reshape(dwt_ac_plain, [Naffinity*Nlinkage*Ntau, Nmode, -1], order='C')
dwt_ac_d1 = np.reshape(dwt_ac_d1, [Naffinity*Nlinkage*Ntau, Nmode, -1], order='C')
dwt_ac_d2 = np.reshape(dwt_ac_d2, [Naffinity*Nlinkage*Ntau, Nmode, -1], order='C')
'''
dwt_ac_plain = np.transpose(dwt_ac_plain.T[ind_sc].T, [0, 1, 3, 4, 5, 6, 8, 9, 10, 2, 7])
dwt_ac_d1 = np.transpose(dwt_ac_d1.T[ind_sc].T, [0, 1, 3, 4, 5, 6, 8, 9, 10, 2, 7])
dwt_ac_d2 = np.transpose(dwt_ac_d2.T[ind_sc].T, [0, 1, 3, 4, 5, 6, 8, 9, 10, 2, 7])
dwt_ac_plain = np.reshape(dwt_ac_plain, [-1, Nmode, Nwav_current], order='C')
dwt_ac_d1 = np.reshape(dwt_ac_d1, [-1, Nmode, Nwav_current], order='C')
dwt_ac_d2 = np.reshape(dwt_ac_d2, [-1, Nmode, Nwav_current], order='C')
for i_wn in xrange(len(currentWaveletNames)):
currentWaveletName = currentWaveletNames[i_wn]
print '\t ======= %s' % (currentWaveletName)
f, ax = plt.subplots(figsize=(1.35*Nmode, 3))
barwidth = 0.2
x = np.arange(Nmode)
ind = np.argmax(dwt_ac_plain[:, :, i_wn], axis=0)
#ind = np.unravel_index(ind, [Naffinity, Nlinkage, Ntau]) # C order
ari_res = [dwt_ac_plain[ind[i], i, i_wn] for i in xrange(Nmode)]
ari_res = np.array(ari_res)
b1a = ax.bar(
x, ari_res, width=barwidth, color=barplot_palette[0], alpha=0.8
)
ind = np.argmax(dwt_ac_d1[:, :, i_wn], axis=0)
ari_res = [dwt_ac_d1[ind[i], i, i_wn] for i in xrange(Nmode)]
ari_res = np.array(ari_res)
b1b = ax.bar(
x+barwidth, ari_res, width=barwidth, color=barplot_palette[2], alpha=0.8
)
ind = np.argmax(dwt_ac_d2[:, :, i_wn], axis=0)
ari_res = [dwt_ac_d2[ind[i], i, i_wn] for i in xrange(Nmode)]
ari_res = np.array(ari_res)
b1c = ax.bar(
x+2*barwidth, ari_res, width=barwidth, color=barplot_palette[4], alpha=0.8
)
localModeNames = map(lambda x: 'periodiz. ' if x == 'periodization' else x, param_grid['mode'])
ax.set_ylim(0., 0.6)
ax.set_xticks(x+barwidth)
ax.set_xticklabels(localModeNames)
ax.legend(
(b1a[0], b1b[0], b1c[0]),
('f', "f'", 'f"'),
loc='upper right',
bbox_to_anchor=(1.2, 1.)
)
ax.yaxis.grid(True)
plt.show()